#### Combine municipality-level calibrations within hierarchical Bayesian setup

### Prepare environment

setwd("~/research/coffee-dataverse/src")

do.origspec <- T # For determining priors
source("load.R", encoding="iso-8859-1")

output.suffix <- "-eq24-orig"
priorscale <- 1
do.variety <- NA # 'arabica'
do.prior <- T

library(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
library(MASS)
library(data.table)
library(dplyr)
library(lfe)
library(splines)

### Load the muni-level data

allfits <- read.csv(paste0("../data/allfits", output.suffix, ".csv"))
sumstats <- read.csv(paste0("../data/sumstats", output.suffix, ".csv"))

if (is.na(do.variety)) {
    mod.base <- felm(as.formula(paste("logyield ~", mainspec.rhs, "| 0 | Munic�pio + year")), data=df)
} else {
    ## Classify according to portion arabica
    df.variety <- df %>% group_by(Munic�pio) %>% summarize(arabica.portion=mean(ifelse(is.na(arabica.produce), ifelse(is.na(robusta.produce), NA, 0), arabica.produce) / total.produce, na.rm=T), total.average=mean(total.produce, na.rm=T))

    ## Redefine output.suffix
    output.suffix <- paste0('-', do.variety, output.suffix)

    if (do.variety == 'arabica') {
        mod.base <- felm(as.formula(paste("logyield ~", mainspec.rhs, "| 0 | Munic�pio + year")), data=subset(df, Munic�pio %in% df.variety$Munic�pio[df.variety$arabica.portion == 1]))
        allfits <- subset(allfits, region %in% df.variety$Munic�pio[df.variety$arabica.portion == 1])
    } else if (do.variety == 'robusta') {
        mod.base <- felm(as.formula(paste("logyield ~", mainspec.rhs, "| 0 | Munic�pio + year")), data=subset(df, Munic�pio %in% df.variety$Munic�pio[df.variety$arabica.portion == 0]))
        allfits <- subset(allfits, region %in% df.variety$Munic�pio[df.variety$arabica.portion == 0])
    }
}

source("load-single.R")

## Bayesian model code

stan.model.optim <- "
data {
  int<lower=0> K; // coefficients
  int<lower=0> J; // regions
  int<lower=0> L; // group predictors
  real<lower=0> nu[J]; // dof of t-dist

  vector[K] yy[J]; // estimated parameter values
  cov_matrix[K] Sigma[J]; // VCV across parameter values

  matrix[J, L] uu;

  matrix[L, K] mu_priors;
  matrix<lower=0>[L, K] mu_priors_se;
}
parameters {
  matrix[K, J] zz; // standardized true params
  cholesky_factor_corr[K] L_Omega;
  vector<lower=0, upper=pi()/2>[K] tau_unif;
  matrix[L, K] gamma;
}
transformed parameters {
  vector<lower=0>[K] tau = 2.5 * tan(tau_unif);
  matrix[J, K] beta = uu * gamma + (diag_pre_multiply(tau, L_Omega) * zz)';
}
model {
  to_vector(zz) ~ std_normal();
  L_Omega ~ lkj_corr_cholesky(2);
  for (ll in 1:L)
    gamma[ll] ~ normal(mu_priors[ll], mu_priors_se[ll]);
  for (jj in 1:J)
    yy[jj] ~ multi_student_t(nu[jj], beta[jj], Sigma[jj]);
}
"

stan.model.optim.noprior <- "
data {
  int<lower=0> K; // coefficients
  int<lower=0> J; // regions
  int<lower=0> L; // group predictors
  real<lower=0> nu[J]; // dof of t-dist

  vector[K] yy[J]; // estimated parameter values
  cov_matrix[K] Sigma[J]; // VCV across parameter values

  matrix[J, L] uu;
}
parameters {
  matrix[K, J] zz; // standardized true params
  cholesky_factor_corr[K] L_Omega;
  vector<lower=0, upper=pi()/2>[K] tau_unif;
  matrix[L, K] gamma;
}
transformed parameters {
  vector<lower=0>[K] tau = 2.5 * tan(tau_unif);
  matrix[J, K] beta = uu * gamma + (diag_pre_multiply(tau, L_Omega) * zz)';
}
model {
  to_vector(zz) ~ std_normal();
  L_Omega ~ lkj_corr_cholesky(2);
  for (jj in 1:J)
    yy[jj] ~ multi_student_t(nu[jj], beta[jj], Sigma[jj]);
}
"

## Prepare inputs

masterses <- apply(mcmc, 2, function(x) sd(x, na.rm=T))

allmus <- matrix(NA, length(uniregs), ncol(mcmc))
allses <- matrix(NA, length(uniregs), ncol(mcmc))
allnotes <- matrix(NA, length(uniregs), ncol(mcmc))
for (rr in 1:length(uniregs)) {
    print(uniregs[rr])

    for (cc in 1:ncol(mcmc)) {
        note <- 0

        dof <- floor(mean(sumstats$neff[sumstats$region == uniregs[rr] & sumstats$param == names(mcmc)[cc]]))
        if (is.na(dof)) {
            dof <- floor(mean(sumstats$neff[sumstats$region == uniregs[rr]], na.rm=T))
            note <- note + 1
        }

        fitted <- tryCatch({
            fitdistr(mcmc[regions == uniregs[rr], cc], "t", df=dof)$estimate
        }, error=function(e) {
            note <- note + 2
            c(mean(mcmc[regions == uniregs[rr], cc], na.rm=T), sd(mcmc[regions == uniregs[rr], cc], na.rm=T))
        })
        allmus[rr, cc] <- fitted[1]
        if (is.na(fitted[2])) {
            note <- note + 4
            allses[rr, cc] <- masterses[cc]
        } else
            allses[rr, cc] <- fitted[2]

        allnotes[rr, cc] <- note
    }
}
if (is.na(do.variety))
    save(allmus, allses, allnotes, file=paste0("../data/allvals", output.suffix, ".RData"))
## load(paste0("../data/allvals", output.suffix, ".RData"))

allses[allses == 0] <- NA

mu.priors <- matrix(NA, 1, ncol(mcmc))
se.priors <- matrix(NA, 1, ncol(mcmc))
resid.sds <- rep(NA, ncol(mcmc))
for (cc in 1:ncol(mcmc)) {
    mu.priors[, cc] <- sum(allmus[, cc] / allses[, cc]^2, na.rm=T) / sum(!is.na(allmus[, cc]) / allses[, cc]^2, na.rm=T)
    se.priors[, cc] <- sqrt(sum((allmus[, cc] - mu.priors[, cc])^2 / allses[, cc]^2, na.rm=T) / sum(!is.na(allmus[, cc]) / allses[, cc]^2, na.rm=T))
    resid.sds[cc] <- sd(allmus[, cc] - mu.priors[, cc], na.rm=T)
}
print(rbind(mu.priors, se.priors))

## This drops any regions that had extreme values before
allvcv <- array(NA, c(length(uniregs), ncol(mcmc), ncol(mcmc)))
for (rr in 1:length(uniregs)) {
    print(uniregs[rr])

    cors <- cor(mcmc[regions == uniregs[rr],])

    ## Stop long-tailed correlation with death_coeff1
    cors[which(names(mcmc) == 'death_coeff1'), ] <- 0
    cors[, which(names(mcmc) == 'death_coeff1')] <- 0
    cors[which(names(mcmc) == 'death_coeff1'), which(names(mcmc) == 'death_coeff1')] <- 1

    cors[is.na(cors)] <- 0 # for variables with no info
    covs <- diag(allses[rr,]) %*% cors %*% diag(allses[rr,])

    allvcv[rr, , ] <- (covs + t(covs)) / 2 # ensure symmetry
    if (priorscale != Inf)
        allvcv[rr, , ] <- allvcv[rr, , ] + diag(resid.sds / priorscale)
}

## Drop FEs

fes <- grep("_fe", names(mcmc))

sumstats$region <- factor(sumstats$region, levels=uniregs)
dofdf <- sumstats %>% group_by(region) %>% dplyr::summarize(dof=mean(neff[param %in% names(mcmc)[-fes]], na.rm=T), Rhat=mean(Rhat[param %in% names(mcmc)[-fes]]))
dofdf$valid <- !is.nan(dofdf$dof) & rowSums(is.na(allmus)) == 0 & !is.na(allvcv[, 1, 1]) & !is.na(allvcv[, 13, 13])
dofdf$valid[unique(which(!is.finite(allvcv), arr.ind=T)[, 1])] <- F

library(matrixcalc)
for (ii in which(dofdf$valid)) {
    if (!is.positive.definite(allvcv[ii, -fes, -fes]))
        print(ii)
}

write.csv(dofdf[dofdf$valid,], paste0("../data/combined-muniinfo", output.suffix, ".csv"))

## Replace mu.priors and se.priors for OLS model results

mu.priors[grep("yield_coeff", names(mcmc))] <- mod.base$coeff[1:length(weatherpreds)]
se.priors[grep("yield_coeff", names(mcmc))] <- mod.base$cse[1:length(weatherpreds)]

## Fit the model

stan.data <- list(K = ncol(mcmc) - length(fes),
                  J = sum(dofdf$valid), L = 1,
                  nu = dofdf$dof[dofdf$valid],
                  yy = allmus[dofdf$valid, -fes, drop=F],
                  Sigma = allvcv[dofdf$valid, -fes, -fes, drop=F],
                  uu = matrix(1, sum(dofdf$valid), 1),
                  mu_priors = matrix(mu.priors[1, -fes], 1, length(mu.priors) - length(fes)),
                  mu_priors_se = matrix(se.priors[1, -fes] * priorscale, 1, length(mu.priors) - length(fes)))

if (do.prior) {
    fit <- stan(model_code=stan.model.optim, data=stan.data,
                iter = 2000, chains = 8)
} else {
    fit <- stan(model_code=stan.model.optim.noprior, data=stan.data,
                iter = 2000, chains = 8)
}

la <- extract(fit, c('gamma', 'beta', 'tau'), permute=T)
if (do.prior) {
    save(la, file=paste0("../data/combined-betagamma", output.suffix, ".RData"))
} else {
    save(la, file=paste0("../data/combined-betagamma", output.suffix, "-noprior.RData"))
}
